{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Gunshot Detection Using ML\n",
    "## MSDS17001 MSDS17011 MSEE17001 PhDEE17004\n",
    "### Fawad Arshad, Jawad Arshad, Zeeshan Haider, Hazoor Ahmed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### IMPORTING LIBRARIES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn\n",
    "%matplotlib inline\n",
    "import librosa\n",
    "from tqdm import tqdm_notebook\n",
    "import os\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.externals import joblib\n",
    "from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, confusion_matrix\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Dropout,Conv1D,MaxPool1D,Flatten,Conv2D,BatchNormalization,Activation\n",
    "from keras.optimizers import Adam\n",
    "from keras import models\n",
    "from keras.callbacks import ModelCheckpoint\n",
    "from sklearn import svm\n",
    "from sklearn.ensemble import IsolationForest\n",
    "from keras.models import Model, load_model\n",
    "from keras.layers import Input\n",
    "from keras import regularizers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ds=pd.read_csv('gunshot.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Creating feature dataset using Kaggle and other sources and storing in disk...."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "size=3*22050\n",
    "dataset=[]\n",
    "for i in tqdm_notebook(range(40000)):\n",
    "    timeseries=np.zeros(size)\n",
    "    loadedseries=librosa.core.load('./BigDataset/{}'.format(ds['fname'].iloc[i]), sr= 22050, res_type=\"kaiser_fast\")[0]\n",
    "    loadedsize=loadedseries.shape[0]\n",
    "    if loadedsize>size:\n",
    "        timeseries=loadedseries[:size]\n",
    "    else:\n",
    "        timeseries[:loadedsize]=loadedseries\n",
    "    a= librosa.feature.chroma_cens(timeseries).ravel()\n",
    "    b= librosa.feature.chroma_cqt(timeseries).ravel()\n",
    "    c= librosa.feature.chroma_stft(timeseries,S=np.abs(librosa.stft(timeseries))).ravel()\n",
    "    d= librosa.feature.delta(librosa.feature.melspectrogram(timeseries,n_mels=20)).ravel()\n",
    "    e= librosa.feature.melspectrogram(timeseries,n_mels=20).ravel()\n",
    "    f= librosa.feature.mfcc(timeseries,n_mfcc=40).ravel()\n",
    "    g= librosa.feature.poly_features(timeseries).ravel()\n",
    "    h= librosa.feature.rmse(timeseries).ravel()\n",
    "    i= librosa.feature.spectral_bandwidth(timeseries).ravel() \n",
    "    j= librosa.feature.spectral_centroid(timeseries).ravel()\n",
    "    k= librosa.feature.spectral_contrast(timeseries).ravel()\n",
    "    l= librosa.feature.spectral_flatness(timeseries).ravel()\n",
    "    m= librosa.feature.spectral_rolloff(timeseries).ravel()\n",
    "    n= librosa.feature.tempogram(timeseries,win_length=10).ravel() \n",
    "    o= librosa.feature.tonnetz(timeseries).ravel()\n",
    "    p= librosa.feature.zero_crossing_rate(timeseries).ravel() \n",
    "    features=np.concatenate((a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p))\n",
    "    dataset.append(features)\n",
    "\n",
    "\n",
    "dataset=np.array(dataset)\n",
    "np.save('dataset3secCompleteFeatures',dataset)\n",
    "def encode(x):\n",
    "    if x=='notAGunshot':\n",
    "        return 0\n",
    "    else:\n",
    "        return 1\n",
    "\n",
    "labels=ds['label'].apply(encode)[:40000]\n",
    "labels=labels.as_matrix()\n",
    "np.save('labels',labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Creating feature dataset using MIVIA Data and storing in disk...."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "size=3*22050\n",
    "dataset=[]\n",
    "files=os.listdir('./train_gunshots/')\n",
    "for i in tqdm_notebook(range(len(files))):\n",
    "    timeseries=np.zeros(size)\n",
    "    loadedseries=librosa.core.load('./train_gunshots/'+files[i], sr= 22050)[0]\n",
    "    loadedsize=loadedseries.shape[0]\n",
    "    if loadedsize>size:\n",
    "        timeseries=loadedseries[:size]\n",
    "    else:\n",
    "        timeseries[:loadedsize]=loadedseries\n",
    "    a= librosa.feature.chroma_cens(timeseries).ravel()\n",
    "    b= librosa.feature.chroma_cqt(timeseries).ravel()\n",
    "    c= librosa.feature.chroma_stft(timeseries,S=np.abs(librosa.stft(timeseries))).ravel()\n",
    "    d= librosa.feature.delta(librosa.feature.melspectrogram(timeseries,n_mels=20)).ravel()\n",
    "    e= librosa.feature.melspectrogram(timeseries,n_mels=20).ravel()\n",
    "    f= librosa.feature.mfcc(timeseries,n_mfcc=40).ravel()\n",
    "    g= librosa.feature.poly_features(timeseries).ravel()\n",
    "    h= librosa.feature.rmse(timeseries).ravel()\n",
    "    i= librosa.feature.spectral_bandwidth(timeseries).ravel() \n",
    "    j= librosa.feature.spectral_centroid(timeseries).ravel()\n",
    "    k= librosa.feature.spectral_contrast(timeseries).ravel()\n",
    "    l= librosa.feature.spectral_flatness(timeseries).ravel()\n",
    "    m= librosa.feature.spectral_rolloff(timeseries).ravel()\n",
    "    n= librosa.feature.tempogram(timeseries,win_length=10).ravel() \n",
    "    o= librosa.feature.tonnetz(timeseries).ravel()\n",
    "    p= librosa.feature.zero_crossing_rate(timeseries).ravel() \n",
    "    features=np.concatenate((a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p))\n",
    "    dataset.append(features)\n",
    "\n",
    "\n",
    "dataset=np.array(dataset)\n",
    "np.save('datasetNewGUNSHOTS',dataset)\n",
    "labels = np.ones(dataset.shape[0])\n",
    "labels=labels.reshape(labels.shape[0],1)\n",
    "np.save('datasetNewGUNSHOTSlabels',labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Combining DataSet from all sources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "datasetnew=np.load('datasetNewGUNSHOTS.npy')\n",
    "labelsnew=np.load('labels.npy').ravel()\n",
    "dataset=np.load('dataset3secCompleteFeatures.npy')\n",
    "labels=np.load('dataset3secFullFeatureslabels.npy')\n",
    "dataset=np.concatenate((dataset,datasetnew))\n",
    "labels=np.concatenate((labels,labelsnew))\n",
    "\n",
    "del datasetnew\n",
    "del labelsnew"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Apply PCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pca=PCA(50,copy=False)\n",
    "dataset=pca.fit_transform(dataset)\n",
    "labels=labels.reshape(-1,1)\n",
    "joblib.dump(pca,'PCAOBJECTNEW.pkl')\n",
    "np.save('PCAFeaturesNEW',dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### train val test split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dataset=list(zip(dataset,labels))\n",
    "dataset=sorted(dataset,key=lambda x: x[1][0])\n",
    "\n",
    "train=dataset[31887:31887+15000]\n",
    "train.extend(dataset[:22500])\n",
    "\n",
    "\n",
    "val=dataset[31887+15000:31887+15000+700]\n",
    "val.extend(dataset[22500:22500+8500])\n",
    "\n",
    "\n",
    "test=dataset[31887+15000+700:31887+15000+700+137]\n",
    "test.extend(dataset[22500+8500:22500+8500+887])\n",
    "\n",
    "\n",
    "train=np.array(train)\n",
    "val=np.array(val)\n",
    "test=np.array(test)\n",
    "\n",
    "np.random.shuffle(train)\n",
    "np.random.shuffle(val)\n",
    "np.random.shuffle(test)\n",
    "\n",
    "\n",
    "x_train=[]\n",
    "y_train=[]\n",
    "for i in range(train.shape[0]):\n",
    "    x_train.append(train[i][0])\n",
    "    y_train.append(train[i][1])\n",
    "x_train=np.array(x_train)\n",
    "y_train=np.array(y_train)\n",
    "\n",
    "\n",
    "x_val=[]\n",
    "y_val=[]\n",
    "for i in range(val.shape[0]):\n",
    "    x_val.append(val[i][0])\n",
    "    y_val.append(val[i][1])\n",
    "x_val=np.array(x_val)\n",
    "y_val=np.array(y_val)\n",
    "\n",
    "\n",
    "x_test=[]\n",
    "y_test=[]\n",
    "for i in range(test.shape[0]):\n",
    "    x_test.append(test[i][0])\n",
    "    y_test.append(test[i][1])\n",
    "x_test=np.array(x_test)\n",
    "y_test=np.array(y_test)\n",
    "\n",
    "np.save('x_train',x_train)\n",
    "np.save('y_train',y_train)\n",
    "np.save('x_val',x_val)\n",
    "np.save('y_val',y_val)\n",
    "np.save('x_test',x_test)\n",
    "np.save('y_test',y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TRAINING MODELS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### NEURAL NETWORK"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(Dense(30, input_dim=50, kernel_initializer='he_normal'))\n",
    "model.add(BatchNormalization())\n",
    "model.add(Activation('sigmoid'))\n",
    "model.add(Dense(15, kernel_initializer='he_normal'))\n",
    "model.add(BatchNormalization())\n",
    "model.add(Activation('sigmoid'))\n",
    "model.add(Dropout(0.4))\n",
    "model.add(Dense(5, kernel_initializer='he_normal'))\n",
    "model.add(BatchNormalization())\n",
    "model.add(Activation('sigmoid'))\n",
    "model.add(Dropout(0.4))\n",
    "model.add(Dense(1, kernel_initializer='he_normal', activation='sigmoid'))\n",
    "\n",
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
    "history=model.fit(x_train, y_train, batch_size=512, epochs=50,validation_data=(x_val,y_val),verbose=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### CONVOLUTIONAL NEURAL NETWORK"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_traincnn=x_train.reshape((37500,50,1))\n",
    "x_valcnn=x_val.reshape((9200,50,1))\n",
    "x_testcnn=x_test.reshape((1024,50,1))\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(Conv1D(12, 5, input_shape=(50,1),kernel_initializer='he_normal'))\n",
    "model.add(BatchNormalization())\n",
    "model.add(Activation('sigmoid'))\n",
    "model.add(MaxPool1D(pool_size=2))\n",
    "model.add(Conv1D(6, 5,kernel_initializer='he_normal'))\n",
    "model.add(BatchNormalization())\n",
    "model.add(Activation('sigmoid'))\n",
    "model.add(Dropout(0.4))\n",
    "model.add(MaxPool1D(pool_size=2))\n",
    "model.add(Flatten())\n",
    "model.add(Dense(10,kernel_initializer='he_normal'))\n",
    "model.add(BatchNormalization())\n",
    "model.add(Activation('sigmoid'))\n",
    "model.add(Dropout(0.4))\n",
    "model.add(Dense(5,kernel_initializer='he_normal'))\n",
    "model.add(BatchNormalization())\n",
    "model.add(Activation('sigmoid'))\n",
    "model.add(Dense(1, activation='sigmoid'))\n",
    "\n",
    "model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])\n",
    "call=ModelCheckpoint('CNNVALBESTNEW.h5',monitor='val_loss',save_best_only=True,verbose=0)\n",
    "history=model.fit(x_traincnn, y_train, batch_size=512, epochs=50,validation_data=(x_valcnn,y_val),verbose=0,callbacks=[call])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Anomaly Detection Methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_train=np.load('x_train.npy')\n",
    "x_val=np.load('x_val.npy')\n",
    "x_test=np.load('x_test.npy')\n",
    "Y_train=np.load('y_train.npy')\n",
    "Y_val=np.load('y_val.npy')\n",
    "Y_test=np.load('y_test.npy')\n",
    "\n",
    "\n",
    "y_train=Y_train.ravel()\n",
    "del Y_train\n",
    "y_val=Y_val.ravel()\n",
    "del Y_val\n",
    "y_test=Y_test.ravel()\n",
    "del Y_test\n",
    "\n",
    "y_train=np.concatenate((y_train,y_val))\n",
    "x_train=np.concatenate((x_train,x_val))\n",
    "\n",
    "x_trainGunshots=x_train[y_train==1]\n",
    "x_trainNotGunshots=x_train[y_train==0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#####  ISOLATION FORESTS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "clf = IsolationForest()\n",
    "clf.fit(x_trainGunshots)\n",
    "\n",
    "joblib.dump(clf,'IF.pkl')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### SVMONECLASS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "clf = svm.OneClassSVM(nu=0.01, kernel=\"\",degree=1)\n",
    "clf.fit(x_trainGunshots)\n",
    "\n",
    "joblib.dump(clf,'SVMlinear.pkl')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### AutoEncoders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "input_dim = x_trainGunshots.shape[1]\n",
    "encoding_dim = 100\n",
    "input_layer = Input(shape=(input_dim, ))\n",
    "\n",
    "encoder = Dense(encoding_dim,kernel_initializer='he_normal',activity_regularizer=regularizers.l1(10e-3))(input_layer)\n",
    "encoder = Activation('relu')(encoder)\n",
    "encoder = Dense(int(encoding_dim // 3),kernel_initializer='he_normal')(encoder)\n",
    "encoder = Activation('relu')(encoder)\n",
    "\n",
    "decoder = Dense(int(encoding_dim // 3),kernel_initializer='he_normal')(encoder)\n",
    "decoder = Activation('relu')(decoder)\n",
    "decoder = Dense(input_dim, activation='linear',kernel_initializer='he_normal')(decoder)\n",
    "\n",
    "autoencoder = Model(inputs=input_layer, outputs=decoder)\n",
    "\n",
    "\n",
    "autoencoder.compile(optimizer='adam', \n",
    "                    loss='mean_squared_error', \n",
    "                    metrics=['accuracy'])\n",
    "\n",
    "checkpointer = ModelCheckpoint(filepath=\"autoencoderthird.h5\",verbose=0,monitor='loss',save_best_only=True)\n",
    "\n",
    "history = autoencoder.fit(x_trainGunshots, x_trainGunshots,epochs=100,batch_size=32,shuffle=True,verbose=0,callbacks=[checkpointer]).history"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
